from lxml import etree
import requests
from ddddocr import DdddOcr
from util.time_stamp import get_time_stamp

# 1. 获取隐藏域
res_login_get = requests.get('https://www.gushiwen.cn/user/login.aspx')
tree = etree.HTML(res_login_get.text)
ocr = DdddOcr(show_ad=False)

__VIEWSTATE = tree.xpath('//input[@id="__VIEWSTATE"]/@value')[0]
__VIEWSTATEGENERATOR = tree.xpath('//input[@id="__VIEWSTATEGENERATOR"]/@value')[0]

while True:
    res_img = requests.get(url=f'https://www.gushiwen.cn/RandCode.ashx?t={get_time_stamp}')
    with open("./static/imgs/code.png", "wb") as f:
        f.write(res_img.content)

    code = ocr.classification(res_img.content)
    print(code)

    res_login_post = requests.post('https://www.gushiwen.cn/user/login.aspx', data={
        '__VIEWSTATE': __VIEWSTATE,
        '__VIEWSTATEGENERATOR': __VIEWSTATEGENERATOR,
        'email': '15003884159',
        'pwd': 'Fan030311.',
        'denglu': '登录',
        'code': f'{code}'
    }, cookies=res_img.cookies)  # 携带cookie 是为了携带验证码 对应的cookie codeyz

    if "您输入的验证码有误" in res_login_post.text:
        print(f"验证码识别失败， 刷新验证码再次识别 ")
    else:
        print(f"登录成功")
        break
# print(res_login_post.cookies)

res_collection = requests.get('https://www.gushiwen.cn/user/collect.aspx', cookies=res_login_post.cookies)
tree = etree.HTML(res_collection.text)

items = tree.xpath('//div[@class="sons"]/div[@class="cont"]/a')

for item in items:
    print(item.xpath('./@href'), item.xpath('./text()'))

